clear
** replace 
use Newanes_timeseries_cdf.dta, clear
set type double
gen year = VCF0004
drop if year==1948
gen id = year*10000 + VCF0006

** Now merge cdf with variables from Time Series Studies merge files from "createmergevariables.do"
merge year id using "Newmerge1952.dta", unique sort
tab _merge
drop _merge
merge year id using "Newmerge1956.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1960.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1964.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1968.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1972.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1976.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1980.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1984.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1988.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1992.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge1996.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge2000.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge2004.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge2008.dta", unique sort update
tab _merge
drop _merge
merge year id using "Newmerge2012.dta", unique sort update
tab _merge
drop _merge

** combine 2012 variables on education, age, female, black, pidstrength, polsoph, and importantdiffs with CDF variables
gen x12 = education
drop education
gen education = VCF0110
recode education 0=. 1=0 2=1 3=2 4=3
replace education = x12 if year==2012
drop x12

gen x12 = age
drop age
gen age = VCF0101
recode age 0=.
replace age = x12 if year==2012
drop x12

gen x12 = female
drop female
gen female = 0 if VCF0104==1
replace female = 1 if VCF0104==2
replace female = x12 if year==2012
drop x12

gen x12 = black
drop black
gen black = 0 if VCF0105a==1
replace black = 1 if VCF0105a==2
replace black = x12 if year==2012
drop x12

gen x12 = pidstrength
drop pidstrength
gen pidstrength = VCF0305
recode pidstrength 0=. 1=0 2=1 3=2 4=3
gen pidstrength2 = VCF0305
recode pidstrength2 0=. 1=0 2=1 3=1 4=2
replace pidstrength2 = x12 if year==2012
drop x12

gen x12 = polsoph
drop polsoph
gen polsoph = VCF0050a
replace polsoph = x12 if year==2012
drop x12

recode polsoph 1=4 2=3 3=2 4=1 5=0 9=.
gen sophsplit = 0 if polsoph!=.
replace sophsplit = 1 if polsoph==4
replace sophsplit = 1 if polsoph==3

gen x12 = importantdiffs
drop importantdiffs
gen importantdiffs = VCF0501
recode importantdiffs 1=0 2=1 9=0 0=.
replace importantdiffs=x12 if year==2012
drop x12

gen efficacy = VCF0648
recode efficacy 999=.
replace efficacy= efficacy/100

*** make weight pre-election weight

replace weight = preweight if weight==.

** Create type of voter measure

* 3rd party voters coded as missing
gen catvoter = 0 if pastvote==0 & currentvote==0
replace catvoter = 1 if pastvote==1 & currentvote==1
replace catvoter = 1 if pastvote==2 & currentvote==2
*replace catvoter = 1 if pastvote==3 & currentvote==3
replace catvoter = 2 if pastvote==1 & currentvote==2
*replace catvoter = 2 if pastvote==1 & currentvote==3
replace catvoter = 2 if pastvote==2 & currentvote==1
*replace catvoter = 2 if pastvote==2 & currentvote==3
*replace catvoter = 2 if pastvote==3 & currentvote==1
*replace catvoter = 2 if pastvote==3 & currentvote==2
replace catvoter = 3 if pastvote==0 & currentvote==1
replace catvoter = 3 if pastvote==0 & currentvote==2
*replace catvoter = 3 if pastvote==0 & currentvote==3
replace catvoter = 4 if pastvote==1 & currentvote==0
replace catvoter = 4 if pastvote==2 & currentvote==0
*replace catvoter = 4 if pastvote==3 & currentvote==0

label define catvoter 0 "Repeat Non-voter" 1 "Standpatter" 2 "Floating Voter" 3 "Surger" 4 "Decliner"  
label values catvoter catvoter
tab catvoter
tab catvoter if currentvote>0 & currentvote!=.


** redefine catvoter into catvoter2 which has surgers and decliners are in same category
gen catvoter2 = catvoter
recode catvoter2 4=3
label define catvoter2 0 "Repeat Non-voter" 1 "Standpatter" 2 "Floating Voter" 3 "Surge & Decliner"
label values catvoter2 catvoter2


** Add in year-level measures

* re-election campaigns, excludes 64 and 76
gen reelectcamp = 0 
replace reelectcamp=1 if year==1956
replace reelectcamp=1 if year==1972
replace reelectcamp=1 if year==1980
replace reelectcamp=1 if year==1984
replace reelectcamp=1 if year==1992
replace reelectcamp=1 if year==1996
replace reelectcamp=1 if year==2004
replace reelectcamp=1 if year==2012

* Add in DW-Nominate Polarization estimate, average of party mean 1st Dimension distance across House and Senate, calculated from Poole and Rosenthal file "Political Polarization Measures" downloaded from http://voteview.com/political_polarization.asp on September 15, 2013

gen polarization = .
replace polarization = 0.389 if year==1952
replace polarization = 0.3975 if year==1954
replace polarization = 0.448 if year==1956
replace polarization = 0.4575 if year==1958
replace polarization = 0.488 if year==1960
replace polarization = 0.501 if year==1962
replace polarization = 0.522 if year==1964
replace polarization = 0.5315 if year==1966
replace polarization = 0.5065 if year==1968
replace polarization = 0.5135 if year==1970
replace polarization = 0.507 if year==1972
replace polarization = 0.549 if year==1974
replace polarization = 0.537 if year==1976
replace polarization = 0.5215 if year==1978
replace polarization = 0.535 if year==1980
replace polarization = 0.5575 if year==1982
replace polarization = 0.5785 if year==1984
replace polarization = 0.6065 if year==1986
replace polarization = 0.6195 if year==1988
replace polarization = 0.634 if year==1990
replace polarization = 0.6575 if year==1992
replace polarization = 0.705 if year==1994
replace polarization = 0.764 if year==1996
replace polarization = 0.8055 if year==1998
replace polarization = 0.806 if year==2000
replace polarization = 0.8275 if year==2002
replace polarization = 0.833 if year==2004
replace polarization = 0.8725 if year==2006
replace polarization = 0.883 if year==2008
replace polarization = 0.8985 if year==2010
replace polarization = 0.957 if year==2012

* % change per capita RDPI Q3 from Q3 prior year, calculated in q3annual.xls from bea.gov 
gen changerdpi = .
replace changerdpi = 1.312 if year==1952
replace changerdpi = 1.872 if year==1956
replace changerdpi = 0.686 if year==1960
replace changerdpi = 6.309 if year==1964
replace changerdpi = 3.383 if year==1968
replace changerdpi = 3.559 if year==1972
replace changerdpi = 2.260 if year==1976
replace changerdpi = -0.638 if year==1980
replace changerdpi = 6.497 if year==1984
replace changerdpi = 3.920 if year==1988
replace changerdpi = 3.159 if year==1992
replace changerdpi = 2.133 if year==1996
replace changerdpi = 4.641 if year==2000
replace changerdpi = 1.750 if year==2004
replace changerdpi = -0.360 if year==2008
replace changerdpi = 0.556 if year==2012

** Create mentions and ambivalence variables from likes/dislikes

gen demlikes = VCF0314
recode demlikes 9 = .
gen demdislikes = VCF0315
recode demdislikes 9 = .
gen replikes = VCF0318
recode replikes 9=.
gen repdislikes = VCF0319
recode repdislikes 9=.

gen demcandlikes = VCF0401
recode demcandlikes 9 = .
gen demcanddislikes = VCF0402
recode demcanddislikes 9 = .
gen repcandlikes = VCF0405
recode repcandlikes 9=.
gen repcanddislikes = VCF0406
recode repcanddislikes 9=.

gen dempos = demlikes + repdislikes + demcandlike + repcanddislikes
gen reppos = replikes + demdislikes + repcandlike + demcanddislikes

gen candambiv = (demcandlikes + demcanddislikes + repcandlikes + repcanddislikes)/2 - abs(demcandlikes + repcanddislikes - repcandlikes - demcanddislikes)

gen totalmentions = demlikes + demdislikes + replikes + repdislikes + demcandlikes + demcanddislikes + repcandlikes + repcanddislikes
* since ambiv is determined by mentions
replace totalmentions = 20 if totalmentions>=20 & totalmentions!=.

* Create Ambivalent dummy variable for candidate ambivalence
sort year
gen ambivalent = 0 if candambiv!=.
replace ambivalent = 1 if candambiv>0 & candambiv!=.
* Create categorical variable for candidate ambivalent -1 univalent; 0 indifferent; 1 ambivalent
gen ambivcat = 0 if candambiv!=.
replace ambivcat = -1 if candambiv<0
replace ambivcat = 1 if ambivalent==1

** Party ID measures

gen strongpartisan = 0 if VCF0305!=0 & VCF0305!=.
replace strongpartisan = 1 if VCF0305==4
replace strongpartisan = 1 if pidstrength==2 & year==2012

gen weakpartisan = 0 if VCF0305!=0 & VCF0305!=.
replace weakpartisan = 1 if VCF0305==3

** independent measure with leaners
gen independent = 0 if VCF0305!=0 & VCF0305!=.
replace independent = 1 if VCF0305==1
replace independent = 1 if VCF0305==2
gen indlean= 0 if VCF0305!=0 & VCF0305!=.
replace indlean = 1 if VCF0305==2

** Split partisanship by independent/lean independent vs. weak/strong partisan
gen phalfsplit = 0 if independent!=.
replace phalfsplit = 1 if weakpartisan==1
replace phalfsplit = 1 if strongpartisan==1

** Create recognition of differences scale
gen sumissue = (issuevote+importantdiffs)/2 

** Create weighted national average of variables for mean centering
gen meanage = .
gen meaneducation = .
gen meanpidstrength = .
gen meanpolsoph = .
sort year
set more off
local i = 1952
while `i' <= 2012 {
   sum age [iw=weight] if year==`i'
   replace meanage = r(mean) if year==`i'
   sum education [iw=weight] if year==`i'
   replace meaneducation = r(mean) if year==`i'
   sum pidstrength2 [iw=weight] if year==`i'
   replace meanpidstrength = r(mean) if year==`i'
   sum polsoph [iw=weight] if year==`i'   
   replace meanpolsoph = r(mean) if year==`i'   
   local i = `i' + 4
   }

   ** year-mean center
gen mc_age = age-meanage
gen mc_educ = education-meaneducation
gen mc_strength = pidstrength2-meanpidstrength
gen mc_polsoph = polsoph - meanpolsoph

* rescale polarization from 0=min to 1=max from 1956-2012
sum polarization if issuevote!=.
replace polarization = (polarization-r(min))/(r(max)-r(min))
gen polage = polarization*mc_age
gen poleduc = polarization*mc_educ
gen polstrength = mc_strength*polarization
gen polarsoph = mc_polsoph*polarization

** impute missing important diffs measure for 56 (plus missing sub-samples in 72, 84, and 96)

* first estimate logit as function of issue awareness (issuevote), polarization, age, pidstrength, and education
logit importantdiffs issuevote mc_strength mc_age mc_educ polarization polstrength polage poleduc
predict hatimpdiff, pr

* now take random draw from predicted probability to impute value
gen drawimp= 0
set seed 1000
replace drawimp = 1 if uniform()<=hatimpdiff

* modify rec. of differences scale (sumissue)
replace sumissue = (drawimp+issuevote)/2 if sumissue==.

drop drawimp hatimpdiff
keep year-polarsoph

save "Newmodifiedcdf_final.dta", replace
